knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE, fig.align = 'center') knitr::opts_knit$set(root.dir = "/mnt/raid5/Personal/tangchao/project/Nanopore/BarcodeDecomplex") knitr::opts_knit$set(base.dir = "/mnt/raid5/Personal/tangchao/project/Nanopore/BarcodeDecomplex")
library(caret) library(pbapply) library(parallel) library(multiROC) library(doParallel) library(data.table) library(ggplot2) library(ggpubr)
library(doParallel) cl <- makePSOCKcluster(20) registerDoParallel(cl)
SigTab <- readRDS("./analysis/07.VirusClassification/03.Merge/00.BarcodeSignal/BarcodeSignal.Rds") read2gene <- readRDS("./analysis/07.VirusClassification/03.Merge/00.BarcodeSignal/Reads2Barcode.Rds") setkey(read2gene, read) read2gene[Barcode %in% c("RTA-12", "RTA-13", "RTA-14", "RTA-15", "RTA-16", "RTA-17", "RTA-00", "RTA-01", "RTA-02", "RTA-04"), BarcodeLength := "L20"] read2gene[Barcode %in% c("RTA-24", "RTA-25", "RTA-26", "RTA-28", "RTA-29", "RTA-27", "RTA-30", "RTA-31", "RTA-32"), BarcodeLength := "L22"] read2gene[Barcode %in% c("RTA-03", "RTA-08", "RTA-10", "RTA-11", "RTA-18", "RTA-19", "RTA-20", "RTA-21", "RTA-22", "RTA-23"), BarcodeLength := "L24"] read2gene[Barcode %in% c("RTA-33", "RTA-37", "RTA-34", "RTA-35", "RTA-36", "RTA-38", "RTA-39", "RTA-40", "RTA-41"), BarcodeLength := "L26"] read2gene[Barcode %in% c("RTA-42", "RTA-43", "RTA-44", "RTA-45", "RTA-46", "RTA-47", "RTA-05", "RTA-06", "RTA-07", "RTA-09"), BarcodeLength := "L28"] BarocdeN <- read2gene[, .N, c("Barcode", "BarcodeLength")][order(N, decreasing = T)]
my_compa <- list(c("L20", "L22"), c("L22", "L24"), c("L20", "L24"), c("L24", "L26"), c("L26", "L28"))
ggplot(BarocdeN, aes(x = BarcodeLength, y = N)) + geom_boxplot() + geom_jitter(height = 0) + theme_bw(base_size = 15) + stat_compare_means(comparisons = my_compa, method = "t.test")
ggplot(BarocdeN, aes(x = BarcodeLength, y = log10(N))) + geom_boxplot(outlier.shape = NA) + geom_jitter(height = 0) + theme_bw(base_size = 15) + stat_compare_means(comparisons = my_compa, method = "t.test")
lapply(BarocdeN[, sort(unique(BarcodeLength))], function(i) { rbt2u <- BarocdeN[BarcodeLength == i][1:5, Barcode] r2g <- read2gene[Barcode %in% rbt2u] set.seed(9560) TrainingReads <- r2g[, .SD[sample(.N, 10000), ], Barcode] TestReads <- r2g[!read %in% TrainingReads$read, ] TestReads[, Class := factor(Barcode)] TrainingData <- SigTab[TrainingReads[, read], ] stopifnot(identical(row.names(TrainingData), TrainingReads$read)) TrainingData$Class <- as.factor(TrainingReads$Barcode) TestData <- SigTab[TestReads[, read], ] stopifnot(identical(row.names(TestData), TestReads$read)) TestData$Class <- as.factor(TestReads$Barcode) out <- paste0("./analysis/13.BarcodeLengthCompare/Version1/01.ModelingData/Barcode_", i, ".RData") save(TrainingReads, TestReads, TrainingData, TestData, file = out) })
rbt2u <- BarocdeN[, .SD[which.max(N), ], BarcodeLength][, Barcode] r2g <- read2gene[Barcode %in% rbt2u] set.seed(9560) TrainingReads <- r2g[, .SD[sample(.N, 10000), ], Barcode] TestReads <- r2g[!read %in% TrainingReads$read, ] TestReads[, Class := factor(Barcode)] TrainingData <- SigTab[TrainingReads[, read], ] stopifnot(identical(row.names(TrainingData), TrainingReads$read)) TrainingData$Class <- as.factor(TrainingReads$Barcode) TestData <- SigTab[TestReads[, read], ] stopifnot(identical(row.names(TestData), TestReads$read)) TestData$Class <- as.factor(TestReads$Barcode) out <- paste0("./analysis/13.BarcodeLengthCompare/Version1/01.ModelingData/Barcode_Mix.RData") save(TrainingReads, TestReads, TrainingData, TestData, file = out)
fitControl <- trainControl(method = "repeatedcv", number = 10, repeats = 10)
for(i in gsub("Barcode_", "", gsub(".RData", "", list.files("./analysis/13.BarcodeLengthCompare/Version1/01.ModelingData")))) { load(paste0("./analysis/13.BarcodeLengthCompare/Version1/01.ModelingData/Barcode_", i, ".RData")) rm(list = c("TrainingReads", "TestReads", "TestData")); gc() set.seed(825) rf <- train(Class ~ ., data = TrainingData, preProc = c("center", "scale", "YeoJohnson", "nzv"), method = "rf", trControl = fitControl, verbose = FALSE, tuneGrid = expand.grid(mtry = 35), # tuneLength = 10, metric = "Accuracy", allowParallel = TRUE) out <- paste0("./analysis/13.BarcodeLengthCompare/Version1/02.Models/Barcode_", i, ".Rds") saveRDS(rf, file = out) rm(list = c("TrainingData", "rf", "out")); gc() }
stopCluster(cl)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.